
*************************************************************************************************
/*																								
	Purpose: Clean raw OCG 1973 data									
	Creates: OCG73_analysis.dta												
*/																								
*************************************************************************************************

clear 
set more off

cd "$Mydirectory1/1_DataSources/CPS_1962_1973/"

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

use ./output/ocg1973.dta

* Using CPS weight
drop if weight_cps==.

***************
*** RESTRICT SAMPLE
***************

* Foreignborn (Appendix C in codebook)
	tab bpl, m
	gen foreignborn = bpl>57 if bpl<.
	label var foreignborn "Respondent is foreignborn"
	keep if (foreignborn==0 | foreignborn==.) 

* Restrict age 
	rename age_ccm age
	label var age "Age"
	keep if age>=30 & age<=50

***************
*** DEMOGRAPHICS
***************

* No state, just region
	rename region region4 
	label var region "Respondent region"

* Age, continued
	gen agesq = age * age
	label var agesq "Age squared"
	
* Marital status
	gen married = marital_ccm==1 | marital_ccm==2
	tab married, m
	label var married "Respondent married"

	gen never_married = marital_ccm==5
	tab never_married, m
	
	gen separated = marital_ccm==3
	tab separated, m 
	//note: Unable to code widowed or divorced dummies.
	
* Race
	replace race_ccm=. if race_ccm==3
	rename race_ccm race
	label var race "Respondent race" 
	
	gen black = race==2 if race<.
	label var black "Black"
	
* Fatherforeign
	gen fatherforeign = fbpl>57 if fbpl<.
	tab fatherforeign, m
	label var fatherforeign "Father foreign-born"
	
* Employment
	gen employed = employment==1 | employment==2
	label var employed "Repsondent employed"
	
* Union 
	gen unionR = unionmem==1 if unionmem<.
	label var unionR "Union member"

* Veteran
	tab veteranwar, m
	gen veteran = veteranwar<6 
	label var veteran "Veteran"
	
**********************
*** BIRTHPLACE
*********************
	
* Birthplace and born in the south
	replace bpl=. if bpl==57
	label var bpl "Birthplace, FIPS"

	gen bornsouth = bpl==51 | bpl==1 | bpl==5 | bpl==12 | bpl==13 | bpl==22 | bpl==28 | bpl==37 | bpl==45 | bpl==48 | bpl==40 | bpl==47
	label var bornsouth "R is born in south"
		
* Region born
	gen fips = bpl
	gen region4_born=.
	* Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
	replace region4_born=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
	* Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
	replace region4_born=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46
	/* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
			  Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
	replace region4_born=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 ///
	| fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48
	* West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington 
	replace region4_born=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15
	drop fips
	label var region4_born "Region R born"
	
* Moved regions
	gen moved_region = region4 != region4_born if region4_born<.
	label var moved_region "Moved Census region"
	
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
	
**********************
*** EDUCATION
*********************

* Years of schooling
	tab schl_ccm, m
	rename schl_ccm yrsschool 
	replace yrsschool = yrsschool -1 //note: numbering is one off in codebook
	tab yrsschool, m nol
	
	//Correct value label
	label define school1 ///
	0   "None" ///
    1   "Elementary 1" ///
    2   "Elementary 2" ///
    3   "Elementary 3" ///
    4   "Elementary 4" ///
    5   "Elementary 5" ///
    6   "Elementary 6" ///
    7   "Elementary 7" ///
    8   "Elementary 8" ///
    9   "High School 1" ///
    10  "High School 2" ///
    11  "High School 3" ///
    12  "High School 4" ///
    13  "College 1" ///
    14  "College 2" ///
    15  "College 3" ///
    16  "College 4" ///
    17  "College 5" ///
    18  "College 6+" ///
	, add modify
	label var yrsschool "Years of school"
	tab yrsschool, m
	
	//Adjust years of schooling (based on var "highest grade attended") to reflect grade that R COMPLETED
	replace yrsschool = yrsschool - 1 if grade_ccm==2 & yrsschool~=0 
	tab yrsschool, m nol

* Educational categories
	gen eduR=.
	replace eduR=0 if yrsschool==0 
	replace eduR=1 if yrsschool>=1 & yrsschool<=7 //some grade school
	replace eduR=2 if yrsschool==8 //completed 8th grade
	replace eduR=3 if yrsschool>=9 & yrsschool<=11 //some HS
	replace eduR=4 if yrsschool==12 //4 years of HS
	replace eduR=5 if yrsschool>=13 & yrsschool<=15 //1-3 years of college
	replace eduR=6 if yrsschool>=16 & yrsschool<. //4 or more years of college (like BA)
		
	gen yrsschool_bin=.
	replace yrsschool_bin = 0 if yrsschool==0
	replace yrsschool_bin = 6 if inrange(yrsschool,1,7)
	replace yrsschool_bin = 8 if yrsschool==8
	replace yrsschool_bin = 10 if yrsschool>8 & yrsschool<12
	replace yrsschool_bin = 12 if yrsschool==12
	replace yrsschool_bin = 14 if yrsschool>12 & yrsschool<16
	replace yrsschool_bin = 16 if yrsschool>=16 & yrsschool<20
	label var yrsschool_bin "Years of school, binned"
	tab yrsschool_bin, m
	
	gen hs_ed = eduR>=4 if eduR<.
	gen coll_ed = eduR>=6 if eduR<.
	label var hs_ed "HS educated" 
	label var coll_ed "Coll educated"
	
* Educational categories for father 
	gen edu_dad=.
	replace edu_dad=0 if schl_dad_ocg==0 //none
	replace edu_dad=1 if schl_dad_ocg>=1 & schl_dad_ocg<=7 //some grade school
	replace edu_dad=2 if schl_dad_ocg==8 //completed 8th grade
	replace edu_dad=3 if schl_dad_ocg>=9 & schl_dad_ocg<=11 //some HS
	replace edu_dad=4 if schl_dad_ocg==12 //4 years of HS
	replace edu_dad=5 if schl_dad_ocg>=13 & schl_dad_ocg<=15 //some college
	replace edu_dad=6 if schl_dad_ocg==16 | schl_dad_ocg==17 //college
	label var edu_dad "Educational categories for dad" 
	
	* binned
	gen edu_dad_bin=0 if edu_dad==0 
	replace edu_dad_bin=6 if edu_dad==1 
	replace edu_dad_bin=8 if edu_dad==2 
	replace edu_dad_bin=10 if edu_dad==3 
	replace edu_dad_bin=12 if edu_dad==4 
	replace edu_dad_bin=14 if edu_dad==5 
	replace edu_dad_bin=16 if edu_dad==6 
	tab edu_dad_bin, m
	label var edu_dad_bin "Years of school from bins"
	
	gen dad_hs_ed = edu_dad>=4 & edu_dad<.
	gen dad_coll_ed = edu_dad>=6 & edu_dad<.
	label var dad_hs_ed "Dad HS educated" 
	label var dad_coll_ed "Dad College educated"
	
* Educational categories for mother 
	gen edu_mom=.
	replace edu_mom=0 if schl_mom_ocg==0 //none
	replace edu_mom=1 if schl_mom_ocg>=1 & schl_mom_ocg<=7 //some grade school
	replace edu_mom=2 if schl_mom_ocg==8 //completed 8th grade
	replace edu_mom=3 if schl_mom_ocg>=9 & schl_mom_ocg<=11 //some HS
	replace edu_mom=4 if schl_mom_ocg==12 //4 years of HS
	replace edu_mom=5 if schl_mom_ocg>=13 & schl_mom_ocg<=15 //some college
	replace edu_mom=6 if schl_mom_ocg==16 | schl_mom_ocg==17 //college
	label var edu_mom "Educational categories for mom" 
	
	* binned
	gen edu_mom_bin=0 if edu_mom==0 
	replace edu_mom_bin=6 if edu_mom==1 
	replace edu_mom_bin=8 if edu_mom==2 
	replace edu_mom_bin=10 if edu_mom==3 
	replace edu_mom_bin=12 if edu_mom==4 
	replace edu_mom_bin=14 if edu_mom==5 
	replace edu_mom_bin=16 if edu_mom==6 
	tab edu_mom_bin, m
	label var edu_mom_bin "Years of school from bins"
	
	gen mom_hs_ed = edu_mom>=4 & edu_mom<.
	gen mom_coll_ed = edu_mom>=6 & edu_mom<.
	label var mom_hs_ed "Mom HS educated" 
	label var mom_coll_ed "Mom College educated"
	
	foreach a in dad mom {
	rename schl_`a'_ocg yrsschool_`a'
	tab yrsschool_`a', m
	}

************
* Siblings *
************

*# Brothers--already coded up in 1_ocg1973_clean.do. Name of variable: R_num_brothers
*# Sisters--already coded up in 1_ocg1973_clean.do. Name of variable: R_num_sisters

* Flag indeterminate (high) number of sisters--not available.

*# of siblings (Note: includes any step/half/adopted siblings)
	gen R_num_siblings = R_num_sisters + R_num_brothers if (R_num_sisters!=. & R_num_brothers!=.)
	replace R_num_siblings = R_num_sisters if (R_num_sisters!=. & R_num_brothers==.)
	replace R_num_siblings = R_num_brothers if (R_num_sisters==. & R_num_brothers!=.)
	tab R_num_siblings, m
	label var R_num_siblings "# of R's siblings"

*****************
* Own fertility *
*****************

* # (living) boys--not available
* Flag indeterminate (high) # of boys--not available
* # (living) girls--not available
* Flag indeterminate (high) # of girls--not available
* # of living kids--not available	
* # of deceased kids--not available	
* # of kids ever--not available 
* Dummy: has R ever had kids--not available 
* Dummy: does R have kids right now--not available

*********************************
* # of persons in R's household *
*********************************

* # of kids (of any age) living in R's hh--not available
* # of children 0-17 living in R's hh--not available

* Total # of people living in R's household
	/* Corrent household size for all Rs who report being in a 1-person hh 
	   BUT who also report that they are married and the spouse is present.
	*/
	replace R_hhsize_plusR =2 if R_hhsize_plusR==1 & marital_ccm==1
	tab R_hhsize_plusR, m 

	gen R_hhsize_minusR = R_hhsize_plusR -1
	tab R_hhsize_minusR,m 
	
	label var R_hhsize_plusR "total # of persons in R's hh (including R)"
	label var R_hhsize_minusR "total # of persons in R's hh (NOT including R)"		

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
	
**************
*** FATHER'S OCCUPATION
**************

foreach var of varlist occ_ccm dad_occ1970  {
	tab `var', m

	rename `var' census1970 

* Crosswalk here the 1970 occupations to the ANES occs (merging adds one variable: fatheroccej)
	merge m:1 census1970 using ../Crosswalks/Crosswalk_1970Census_toANES.dta
	assert census1970==. if _merge==1	

	tab census1970 if _merge==1, m 

	count if fatheroccej==. & _merge==1 
	tab census1970 if _merge==1 & fatheroccej==., m
	drop if _merge==2 
	drop _merge

* Replace fathers who are self-employed managers (currently coded as 28) with 21 if they are self-employed
	if "`var'"=="dad_occ1970" {
		replace fatheroccej=21 if fatheroccej==28 & (dad_clsswrk==3 | dad_clsswrk==4) /* Both 3 and 4 are types of self-employed */
		}

	if "`var'"=="occ_ccm" {
		replace fatheroccej=21 if fatheroccej==28 & (classwkr_ccm==3) /*Only 3 is self-employed */
		}

/*	Fix father occupation (fatheroccej--for fathers only), 
    Code mother occupation,
    Dummies for head of household when R was growing up.
*/	
	if "`var'"=="dad_occ1970" {
	
	clonevar fatheroccej_nochange = fatheroccej
	label var fatheroccej_nochange "father or head of hh occupation--unchanged since merge with ANES occs"
	
	//Fix fatheroccej
	replace fatheroccej =. if (R_livewbothparents_age16==2 | R_livewbothparents_age16==.) & inlist(R_headofhh_age16,2,4) 
	tab fatheroccej,m 
	assert fatheroccej==. if fatheroccej_nochange==. | (!inlist(R_livewbothparents_age16,1,.) & !inlist(R_headofhh_age16,1,3,.)) 

	//Mother occupation
	gen motheroccej = fatheroccej_nochange if ((R_livewbothparents_age16==. | R_livewbothparents_age16==2) & (R_headofhh_age16==2 | R_headofhh_age16==4))
	tab motheroccej,m 
	assert motheroccej==. if fatheroccej_nochange==. | (!inlist(R_livewbothparents_age16,.,2) & !inlist(R_headofhh_age16,2,4)) 
	
	//Dummies for head of household when R was growing up
	tab R_headofhh_age16, gen(headofhh_)
		
	ren headofhh_1 headofhh_father
	replace headofhh_father =1 if R_livewbothparents_age16==1 & headofhh_father==. //Note: code father as head of hh for Rs who lived with both parents when growing up.
	tab headofhh_father,m 
	
	ren headofhh_2 headofhh_mother
	ren headofhh_3 headofhh_othermale
	ren headofhh_4 headofhh_otherfemale
	
	foreach name in mother othermale otherfemale {
		replace headofhh_`name' =0 if R_livewbothparents_age16==1 & headofhh_`name'==.
		tab headofhh_`name',m 
	}
			
	label var headofhh_father "Head of hh when R was growing up was R's father"
	label var headofhh_othermale "Head of hh when R was growing up was some other male (not R's father)"
	label var headofhh_otherfemale "Head of hh when R was growing up was some other female (not R's mother)"
	label var headofhh_mother "Head of hh when R was growing up was R's mother"

	//Alternate dummy for father as head of hh during R's childhood.  
	/*Note: When R reports occupation of a parent but does not report who R lived with at age 16, 
	        will assume that R lived with father.*/
	gen headofhh_father_imputed = headofhh_father
	tab fatheroccej_nochange if R_headofhh_age16==. & headofhh_father_imputed==., m 
	tab headofhh_father_imputed if fatheroccej_nochange!=. & R_headofhh_age16==.,m
	replace headofhh_father_imputed =1 if fatheroccej_nochange!=. & R_headofhh_age16==. 
	label var headofhh_father_imputed "Impute dad when parent occ != missing & no info about hh head at age 16"
	
	
	rename census1970 `var' 

	}

	* Rename variable for sons
	if "`var'"=="occ_ccm" {
		rename census1970 `var'
		rename fatheroccej occRej
	}
		
}	

	label var occRej "Respondent occupation, coarsened"
	label var fatheroccej "Father occupation, coarsened"

*****************************************************************************************************
* DUMMIES FOR WHEN WE KNOW WHY DAD OR MOM DIDN'T WORK (I.E. WHY FATHEROCCEJ/MOTHEROCCEJ IS MISSING) *
*****************************************************************************************************
	
	gen father_notworking=.
	replace father_notworking=1 if inlist(dad_occ1970,991,996,998) & fatheroccej==. & inlist(R_livewbothparents_age16,1,.) & inlist(R_headofhh_age16,1,3,.) 
	/*unemployed persons, students, father never worked/had no occupation/was unemployed or not in labor force, rentier */ 
	replace father_notworking=0 if fatheroccej!=.
	tab father_notworking,m 
	
	gen mother_notworking =.
	replace mother_notworking =1 if inlist(dad_occ1970,991,996,997,998) & motheroccej==. & (inlist(R_livewbothparents_age16,.,2) & inlist(R_headofhh_age16,2,4))
	replace mother_notworking =0 if motheroccej!=.
	tab mother_notworking,m
	
* Variable for father being either farm laborer or operator
	gen fatherfarm=0
	replace fatherfarm=. if fatheroccej==.
	replace fatherfarm=1 if fatheroccej==71 | fatheroccej==81
	label var fatherfarm "Father in farm occupation"


*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
	
***************
*** FAMILY INCOME 
***************

* Logged Family income 
	gen lnfaminc_nobin= ln(income_fam_fc) 
	gen lnHHinc = ln(income_hh_fc)

	gen fam_inc=.  
	#delimit ;   
	replace fam_inc=750 if income_fam_fc>=0 & income_fam_fc<1000 ; //<1000
	replace fam_inc=2000 if income_fam_fc>=1000 & income_fam_fc<3000 ; 
	replace fam_inc=4000 if income_fam_fc>=3000 & income_fam_fc<5000 ; 
	replace fam_inc=6250 if income_fam_fc>=5000 & income_fam_fc<7500 ;
	replace fam_inc=8750 if income_fam_fc>=7500 & income_fam_fc<10000 ; 
	replace fam_inc=11250 if income_fam_fc>=10000 & income_fam_fc<12500 ;
	replace fam_inc=13750 if income_fam_fc>=12500 & income_fam_fc<15000 ; 
	replace fam_inc=17500 if income_fam_fc>=15000 & income_fam_fc<20000 ; 
	replace fam_inc=22500 if income_fam_fc>=20000 & income_fam_fc<25000 ;
	replace fam_inc=1.25*25000 if income_fam_fc>=30000 & income_fam_fc<.;
	#delimit cr
	label var fam_inc "Family income, binned"
	
	gen bottomcoded_son = fam_inc==750 if fam_inc<.
	gen topcoded_son = fam_inc==1.25*25000 if fam_inc<.
	label var bottomcoded_son "Respondent family income, bottom coded"
	label var topcoded_son "Respondent family income, top coded"

/* We turn fam_inc into 1950 dollars using the CPI: https://data.bls.gov/timeseries/CUUR0000SA0 */
	gen CPI1950 = 24.1
	gen CPI1972 = 41.8 //1972 is used because ocg is part of March CPS supplement.

	gen fam_inc_real =.
	replace fam_inc_real = fam_inc * (CPI1950/CPI1972) 
	label var fam_inc_real "Family income, in 1950 dollars"
	
	gen lnfaminc=ln(fam_inc_real)
	label var lnfaminc "Logged family income, binned and real"

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

***************
*** BIRTH COHORTS 
***************

	gen year = 1973 
	label var year "Survey year" 
	
	gen dob=year-age-1 //March supplement means we'll subtract one.
	label var dob "Year of birth" 
	tab dob, m

* Assign everyone the decade in which they were born.
	gen decade=.
	replace decade=1920 if dob>=1920 & dob<=1929
	replace decade=1930 if dob>=1930 & dob<=1939
	replace decade=1940 if dob>=1940 & dob<=1949
	label var decade "Decade of birth"
	
* Generate dummies for each decade
	tab decade, gen(decade_)
	
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
	

***************
*** INTERACTIONS 
***************

	global institution_list "unionR veteran black hs_ed coll_ed"

* Demean the variables that we will use
	foreach var in $institution_list {
	sum `var'
	gen `var'_dm = `var'- `r(mean)'
	local temp: var label `var' 
	label var `var'_dm "`temp', demeaned" 
	}
		
*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

***************
*** SAVE
***************

	*Rename unique identifier
	ren id id_ocg73
	label var id_ocg73 "ID number (unique identifier)"

	duplicates report id_ocg73 //no duplicates reported

	compress
	sort id_ocg73
	ren weight_cps weight_ocg73
	order id_ocg73 weight_ocg73
	save ./output/OCG73_IGEanalysis.dta, replace
	